package com.xiaominge.utils.wordUtils.wordInputUtils;

import com.xiaominge.exception.ParameterRuntimeException;
import fr.opensagres.poi.xwpf.converter.core.IXWPFConverter;
import fr.opensagres.poi.xwpf.converter.core.ImageManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.web.multipart.MultipartFile;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.ArrayList;
import java.util.List;

/**
 * @program: wordtest
 * @description: doc  word2003 转html 工具类
 * @author: xiaominge
 * @create: 2019-06-19 18:01
 **/

public class WordToHtmlUtils {
    /**
     * @param inputStream      源文件输入流 file.getInputStream()
     * @param fileName 上传的员文件名称  multipartFile.getOriginalFilename()
     * @param imagePath 图片存放地址（本地址默认为html文件同路径）
     */
    public static WordInfoBean docxToHtml(InputStream inputStream, String fileName,String imagePath) throws Exception {

        //file 类型
        // String fileName = file.getName();
        //获取文件上传的名称  上传类型  MultipartFile
       // String fileName  = file.getOriginalFilename();

        //file 类型
        if (fileName.endsWith(".docx") || fileName.endsWith(".DOCX")) {
            if (!imagePath.equals("/")){
                imagePath=imagePath+"/";
            }

            XWPFDocument document = new XWPFDocument(inputStream);

            //html转化器
            IXWPFConverter<XHTMLOptions> converter = XHTMLConverter.getInstance();
            //html属性器
            XHTMLOptions options = XHTMLOptions.create();
            //图片处理，第二个参数为html文件同级目录下，否则图片找不到。
            String fileNoendName = fileName.substring(0, fileName.indexOf("."));
            String imageRealDir = imagePath+ fileNoendName;
            //重复导入 会覆盖之前的文件图片
            wordimagesfiles(imageRealDir);

            ImageManager imageManager = new ImageManager(new File(imagePath), fileNoendName);
            options.setImageManager(imageManager);
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            converter.convert(document, out, options);


            //获取保存后的图片地址 用户保存
            File filedir = new File(imageRealDir);
            String[] images = filedir.list();
            List<String> ImageRealPath = new ArrayList<>();
            for (String image : images) {
                String RealImage = imageRealDir + File.separator + image;
                ImageRealPath.add(RealImage);
            }
            WordInfoBean wordInfoBean = new WordInfoBean();
            wordInfoBean.setFileName(fileNoendName);
            wordInfoBean.setImagePath(ImageRealPath);
            wordInfoBean.setFileType(2);
            wordInfoBean.setContextHtml(out.toString());
            wordInfoBean.setContextText(new XWPFWordExtractor(document).getText());

            return wordInfoBean;
        } else {
            throw new TypeNotPresentException("类型错误 只能解析docx格式的word", new Exception());
            //文件格式错误
        }
    }

    /**
     * @param inputStream  doc 格式的word 文件
     * @param fileName 文件名称  file.getOriginalFilename()
     * @param imagePath 图片保存的文件夹
     * @return
     * @throws Exception
     */
    public static WordInfoBean docToHtml(InputStream inputStream,String  fileName,String imagePath) throws Exception {
        //file 类型
        // FileInputStream fileInputStream = new FileInputStream(wordFile);

        //上传类型   MultipartFile
        //InputStream inputStream = wordFile.getInputStream();

        HWPFDocument wordDocument = new HWPFDocument(inputStream);
        //获取页眉
        Range headerStoryRange = wordDocument.getHeaderStoryRange();
        //删除页眉
        headerStoryRange.delete();
        //获取页脚
        Range footnoteRange = wordDocument.getFootnoteRange();
        //删除页脚
        footnoteRange.delete();


        //输出文本
        String documentText = wordDocument.getDocumentText();

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            @Override
            public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
                return suggestedName;
            }
        });
        wordToHtmlConverter.processDocument(wordDocument);
        //获取文件上传的名称  上传类型
        //String fileName = inputStream.getOriginalFilename();
        /*// 保存图片   file 类型
        String name = wordFile.getName();*/

        if (fileName.endsWith(".doc")) {
            fileName = fileName.substring(0, fileName.indexOf(".doc"));
        } else if (fileName.endsWith(".DOC")) {
            fileName = fileName.substring(0, fileName.indexOf(".DOC"));
        } else {
            throw new TypeNotPresentException("类型错误 只能解析doc格式的word", new Exception());
        }
        imagePath = imagePath + File.separator + fileName;
        List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
        //保存的图片集合
        List<String> wordImagesPathList = new ArrayList<>();
        if (pics != null) {
            if (pics.size() != 0) {
                //文件保存操作
                wordimagesfiles(imagePath);

            }
            for (int i = 0; i < pics.size(); i++) {

                Picture pic = (Picture) pics.get(i);
                try {
                    //图片的真实地址
                    String realImagePath = imagePath + File.separator + pic.suggestFullFileName();
                    wordImagesPathList.add(realImagePath);
                    pic.writeImageContent(new FileOutputStream(realImagePath));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }


        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();

        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();


        WordInfoBean wordInfoBean = new WordInfoBean();
        wordInfoBean.setContextText(documentText);
        wordInfoBean.setContextHtml(new String(out.toByteArray()));
        //  System.out.println("HTML======" + new String(out.toByteArray()));
        wordInfoBean.setFileType(1);
        wordInfoBean.setImagePath(wordImagesPathList);
        wordInfoBean.setFileName(fileName);

        return wordInfoBean;
    }


    /**
     * 如果是更新删除之前的文件  如果是新建  准备新建的地址
     *
     * @param imagesPath 保存图片的文件夹地址
     */
    private static void wordimagesfiles(String imagesPath) {
        File file = new File(imagesPath);
        if (file.exists()) {
            //如果存在 就删出里面所有的文件
            if (file.isDirectory()) {
                String[] list = file.list();
                for (String path : list) {
                    File f;
                    if (path.endsWith(File.separator)) {
                        f = new File(imagesPath + path);
                    } else {
                        f = new File(imagesPath + File.separator + path);
                    }
                    if (f.isFile()) {
                        f.delete();
                    }
                }
            } else {
                file.delete();
            }

        } else {
            //不存在就创建文件夹
            file.mkdirs();
        }
    }


    public static List<WordInfoBean> readWord(String iamgePath, MultipartFile... files) throws Exception {
        //String a ="[\\u0000-\\u001f\b]";//转义字符 回车等范围
        String regx = "[\\u0000-\\u001f\b]";
        List<WordInfoBean> wordInfoBeans = new ArrayList<>();
        for (MultipartFile file : files) {
            String originalFilename = file.getOriginalFilename();
            if (originalFilename.endsWith(".DOC") || originalFilename.endsWith(".doc")) {
                WordInfoBean wordInfoBean = docToHtml(file.getInputStream(),originalFilename, iamgePath);
                //去除内容中的特殊字符
                wordInfoBean.setContextText(wordInfoBean.getContextText().replaceAll(regx, ""));
                wordInfoBeans.add(wordInfoBean);

            } else if (originalFilename.endsWith(".DOCX") || originalFilename.endsWith(".docx")) {

                WordInfoBean wordInfoBean = docxToHtml(file.getInputStream(),file.getOriginalFilename(), iamgePath);
                //去除内容中的特殊字符
                wordInfoBean.setContextText(wordInfoBean.getContextText().replaceAll(regx, ""));
                wordInfoBeans.add(wordInfoBean);
            } else {
                ParameterRuntimeException.throwException("类型错误:只能解析 doc或者docx");
            }
        }

        return wordInfoBeans;
    }


    public static WordInfoBean readWord(MultipartFile file, String iamgePath) throws Exception {
        //String a ="[\\u0000-\\u001f\b]";//转义字符 回车等范围
        String regx = "[\\u0000-\\u001f\b]";
        String originalFilename = file.getOriginalFilename();
        WordInfoBean wordInfoBean = null;

        if (originalFilename.endsWith(".DOC") || originalFilename.endsWith(".doc")) {
            wordInfoBean = docToHtml(file.getInputStream(),originalFilename, iamgePath);
            //去除内容中的特殊字符
            wordInfoBean.setContextText(wordInfoBean.getContextText().replaceAll(regx, ""));

        } else if (originalFilename.endsWith(".DOCX") || originalFilename.endsWith(".docx")) {
            wordInfoBean = docxToHtml(file.getInputStream(),originalFilename, iamgePath);
            //去除内容中的特殊字符
            wordInfoBean.setContextText(wordInfoBean.getContextText().replaceAll(regx, ""));

        } else {
            ParameterRuntimeException.throwException("类型错误:只能解析 doc或者docx");
        }
        return wordInfoBean;
    }
}
